Code to accompany CTP All-Hands talk.
covdata packageDetails on obtaining and installing the covdata package can be found at the package website. In addition to covdata, to reproduce the analysis here you will need the tidyverse tools for R and the patchwork package. To reproduce this document you will also need the distill package.
library(tidyverse)
library(covdata)
theme_covid <- function(){
theme_minimal(base_size=10) %+replace%
theme(
legend.position = "top"
)
}
theme_set(theme_covid())
The main COVID Tracking Project data file is in long format in covdata.
covus
# A tibble: 345,506 x 7
date state fips data_quality_gr… measure count
<date> <chr> <chr> <chr> <chr> <dbl>
1 2020-10-02 AK 02 A positi… 9040
2 2020-10-02 AK 02 A negati… 460250
3 2020-10-02 AK 02 A pending NA
4 2020-10-02 AK 02 A hospit… 41
5 2020-10-02 AK 02 A hospit… NA
6 2020-10-02 AK 02 A in_icu… NA
7 2020-10-02 AK 02 A in_icu… NA
8 2020-10-02 AK 02 A on_ven… 6
9 2020-10-02 AK 02 A on_ven… NA
10 2020-10-02 AK 02 A recove… 5042
# … with 345,496 more rows, and 1 more variable: measure_label <chr>
measures <- c("positive", "negative", "death")
covus %>%
filter(measure %in% measures, state == "NY") %>%
select(date, state, measure, count) %>%
pivot_wider(names_from = measure, values_from = count) %>%
mutate(across(positive:death, ~.x - lag(.x, order_by = date),
.names = "daily_{col}"))
# A tibble: 213 x 8
date state positive negative death daily_positive
<date> <chr> <dbl> <dbl> <dbl> <dbl>
1 2020-10-02 NY 461629 10514395 25497 1598
2 2020-10-01 NY 460031 10396500 25490 1382
3 2020-09-30 NY 458649 10288664 25479 1000
4 2020-09-29 NY 457649 10191704 25470 1189
5 2020-09-28 NY 456460 10104662 25468 834
6 2020-09-27 NY 455626 10052560 25456 866
7 2020-09-26 NY 454760 9968656 25450 1005
8 2020-09-25 NY 453755 9869708 25446 908
9 2020-09-24 NY 452847 9775798 25439 955
10 2020-09-23 NY 451892 9683800 25437 665
# … with 203 more rows, and 2 more variables: daily_negative <dbl>,
# daily_death <dbl>
We’ll use this in a moment to draw our per capita graph.
state_pops <- uspop %>%
filter(sex_id == "totsex", hisp_id == "tothisp") %>%
select(state_abbr, statefips, pop, state) %>%
rename(name = state,
state = state_abbr, fips = statefips) %>%
mutate(state = replace(state, fips == "11", "DC"))
## Using a convenience function to do something similar
## to the count calculation above
get_daily_count <- function(count, date){
count - lag(count, order_by = date)
}
covus %>%
filter(measure == "death", state %in% unique(state_pops$state)) %>%
group_by(state) %>%
mutate(
deaths_daily = get_daily_count(count, date),
deaths7 = slider::slide_dbl(deaths_daily, mean, .before = 7, .after = 0, na.rm = TRUE)) %>%
left_join(state_pops, by = c("state", "fips")) %>%
filter(date > lubridate::ymd("2020-03-15")) %>%
ggplot(mapping = aes(x = date, y = (deaths7/pop)*1e5)) +
geom_line(size = 0.5) +
scale_y_continuous(labels = scales::comma_format(accuracy = 1)) +
facet_wrap(~ reorder(name, -deaths7/pop), ncol = 8) +
labs(x = "Date",
y = "Deaths per 100,000 Population (Seven Day Rolling Average)",
title = "Average Death Rate per capita from COVID-19: US States and Washington, DC",
subtitle = paste("COVID Tracking Project data as of", format(max(covnat$date), "%A, %B %e, %Y. Seven-day rolling average.")),
caption = "Kieran Healy @kjhealy / Data: https://www.covidtracking.com/") +
theme_minimal()
stmf
# A tibble: 450,945 x 17
country_code cname iso2 continent iso3 year week sex split
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
1 AUT Aust… AT Europe AUT 2000 1 m 0
2 AUT Aust… AT Europe AUT 2000 1 m 0
3 AUT Aust… AT Europe AUT 2000 1 m 0
4 AUT Aust… AT Europe AUT 2000 1 m 0
5 AUT Aust… AT Europe AUT 2000 1 m 0
6 AUT Aust… AT Europe AUT 2000 1 f 0
7 AUT Aust… AT Europe AUT 2000 1 f 0
8 AUT Aust… AT Europe AUT 2000 1 f 0
9 AUT Aust… AT Europe AUT 2000 1 f 0
10 AUT Aust… AT Europe AUT 2000 1 f 0
# … with 450,935 more rows, and 8 more variables: split_sex <dbl>,
# forecast <dbl>, approx_date <date>, age_group <chr>,
# death_count <dbl>, death_rate <dbl>, deaths_total <dbl>,
# rate_total <dbl>
stmf %>%
filter(sex == "b", country_code == "BEL") %>%
group_by(year, week) %>%
mutate(yr_ind = year %in% 2020) %>%
slice(1) %>%
ggplot(aes(x = week, y = deaths_total, color = yr_ind, group = year)) +
geom_line(size = 0.9) +
scale_color_manual(values = c("gray70", "red"), labels = c("2000-2019", "2020")) +
labs(x = "Week of the Year",
y = "Total Deaths",
color = "Year",
title = "Weekly recorded deaths in Belgium, 2000-2020") +
theme_minimal() +
theme(legend.position = "top")
nchs_wdc %>%
filter(jurisdiction %in% c("New York", "New Jersey", "Michigan", "Georgia", "California", "Alabama")) %>%
filter(cause == "All Cause", year > 2014) %>%
group_by(jurisdiction, year, week) %>%
summarize(deaths = sum(n, na.rm = TRUE)) %>%
mutate(yr_ind = year %in% 2020) %>%
filter(!(year == 2020 & week > 35)) %>%
ggplot(aes(x = week, y = deaths, color = yr_ind, group = year)) +
geom_line(size = 0.9) +
scale_color_manual(values = c("gray70", "red"), labels = c("2015-2019", "2020")) +
scale_x_continuous(breaks = c(1, 10, 20, 30, 40, 50), labels = c(1, 10, 20, 30, 40, 50)) +
scale_y_continuous(labels = scales::comma) +
facet_wrap(~ jurisdiction, scales = "free_y", ncol = 3) +
labs(x = "Week of the Year",
y = "Total Deaths",
color = "Years",
title = "Weekly recorded deaths from all causes",
subtitle = "2020 data are for Weeks 1 to 35. Raw Counts, each state has its own y-axis scale.",
caption = "Graph: @kjhealy Data: CDC") +
theme_minimal() +
theme(legend.position = "top")
First we write some functions to draw and assemble the figure.
## First some functions to draw the plots
library(patchwork)
## Choose how many red-line wks
nwks <- 30
season_label <- tibble(wk_num = lubridate::epiweek(as.Date(c("2020-03-01",
"2020-06-01",
"2020-09-01",
"2020-12-01"))),
season_lab = c("Spring", "Summer", "Autumn", "Winter"))
order_panels <- function(st = state, ...) {
df %>%
filter(jurisdiction %in% st, cause != "All Cause") %>%
group_by(cause) %>%
summarize(deaths = sum(n, na.rm = TRUE),
.groups = "drop") %>%
mutate(cause_rank = rank(-deaths),
o = order(cause_rank),
cause_ord = factor(cause, levels = cause[o], ordered = TRUE)) %>%
select(cause, cause_ord)
}
## Count of all deaths
patch_state_count <- function(state) {
out <- df %>%
filter(jurisdiction %in% state, cause == "All Cause") %>%
group_by(year, week) %>%
mutate(yr_ind = year %in% 2020) %>%
filter(!(year == 2020 & week > nwks)) %>%
ggplot(aes(x = week, y = n, color = yr_ind, group = year)) +
geom_line(size = 0.9) +
scale_color_manual(values = c("gray70", "firebrick"), labels = c("2015-2019", "2020")) +
scale_x_continuous(breaks = season_label$wk_num, labels = season_label$season_lab) +
scale_y_continuous(labels = scales::comma) +
labs(x = NULL,
y = "Total Deaths",
color = "Years",
title = "Weekly recorded deaths from all causes",
subtitle = paste0("2020 data are for Weeks 1 to ", nwks, ". Raw Counts."))
out
}
## Line graphs by cause
patch_state_cause <- function(state) {
panel_ordering <- order_panels(st = state)
out <- df %>%
filter(jurisdiction == state,
cause %nin% c("All Cause", "COVID-19 Multiple cause", "COVID-19 Underlying")) %>%
group_by(cause, year, week) %>%
summarize(deaths = sum(n, na.rm = TRUE), .groups = "drop") %>%
mutate(yr_ind = year %in% 2020) %>%
filter(!(year == 2020 & week > nwks)) %>%
left_join(panel_ordering, by = "cause") %>%
ggplot(aes(x = week, y = deaths, color = yr_ind)) +
geom_line(size = 0.9, mapping = aes(group = year)) +
scale_color_manual(values = c("gray70", "firebrick"), labels = c("2015-2019", "2020")) +
scale_x_continuous(breaks = c(1, 10, 20, 30, 40, 50), labels = as.character(c(1, 10, 20, 30, 40, 50))) +
scale_y_continuous(labels = scales::comma) +
facet_wrap(~ cause_ord, ncol = 2, labeller = label_wrap_gen(25)) +
labs(x = "Week of the Year",
y = "Total Deaths",
color = "Years",
title = "Weekly deaths from selected causes",
subtitle = "Panels ordered by number of deaths. Raw Counts.")
out
}
## Column chart of departures from norm, by cause
patch_state_percent <- function(state){
panel_ordering <- order_panels(st = state)
out <- df %>%
filter(jurisdiction %in% state,
year == 2020,
cause %nin% c("All Cause", "COVID-19 Multiple cause",
"COVID-19 Underlying"), !is.na(pct_diff)) %>%
group_by(week) %>%
filter(!(week > nwks)) %>%
mutate(ov_un = pct_diff > 0) %>%
left_join(panel_ordering, by = "cause") %>%
ggplot(aes(x = week, y = pct_diff/100, fill = ov_un)) +
geom_col() +
scale_x_continuous(breaks = c(1, seq(10, nwks, 10)), labels = as.character(c(1, seq(10, nwks, 10)))) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("gray40", "firebrick")) +
guides(fill = FALSE) +
facet_wrap(~ cause_ord, ncol = 2, labeller = label_wrap_gen(25)) +
labs(x = "Week of the Year",
y = "Percent",
title = "Percent difference from 2015-2019 average",
subtitle = paste0("Data for weeks 1 to ", nwks, " only."))
out
}
## Column chart of COVD-19 attributed deaths
patch_state_covid <- function(state) {
out <- df %>%
filter(jurisdiction %in% state, cause %in% c("COVID-19 Multiple cause")) %>%
group_by(year, week) %>%
mutate(yr_ind = year %in% 2020) %>%
filter(year == 2020) %>%
ggplot(aes(x = week, y = n, group = year)) +
geom_col(fill = "gray30") +
scale_x_continuous(breaks = c(1, 10, 20, 30, 40, 50),
labels = as.character(c(1, 10, 20, 30, 40, 50)),
limits = c(1, 52)) +
scale_y_continuous(labels = scales::comma) +
labs(x = "Week of the Year",
y = "Total Deaths",
color = "Years",
subtitle = "Raw counts.",
title = "Weekly deaths recorded as COVID-19 (Multiple cause)")
out
}
## Assemble the figure
make_patchplot <- function(state){
if(state == "New York") {
state_title <- paste(state, "(Excluding New York City)")
} else
{
state_title <- state
}
timestamp <- lubridate::stamp("March 1, 1999", "%B %d, %Y")(lubridate::ymd(Sys.Date()))
(patch_state_count(state) + theme(plot.margin = unit(c(5,0,0,0), "pt"))) / patch_state_covid(state) / (patch_state_cause(state) + (patch_state_percent(state))) +
plot_layout(heights = c(2, 0.5, 4), guides = 'collect') +
plot_annotation(
title = state_title,
caption = paste0("Graph: @kjhealy Data: CDC. This graph was made on ", timestamp, "."),
theme = theme(plot.title = element_text(size = rel(2), hjust = 0, face = "plain")))
}
Next we create a working dataset and a table of jurisdictions from nchs_wdc.
dat <- nchs_wdc %>%
filter(year > 2014) %>%
mutate(month_label = lubridate::month(week_ending_date, label = TRUE))
average_deaths <- nchs_wdc %>%
filter(year %in% c(2015:2019)) %>%
group_by(jurisdiction, week, cause) %>%
summarize(average_wk_deaths = mean(n, na.rm = TRUE))
df <- left_join(dat, average_deaths) %>%
select(everything(), n, average_wk_deaths) %>%
mutate(n_diff = n - average_wk_deaths,
pct_diff = (n_diff / n)*100) %>%
filter(cause %nin% c("Natural Causes", "Other"))
states <- nchs_wdc %>%
select(jurisdiction) %>%
unique() %>%
mutate(fname = tolower(paste0("figures/", jurisdiction, "_patch")),
fname = stringr::str_replace_all(fname, " ", "_"))
Now we can make a composite plot.
make_patchplot("United States")
make_patchplot("Connecticut")